import sys
sys.path.append('../ML/')
import pandas as pd
years = [year for year in range(2018, 2023)] # start year to 2022
location = '臺中'
dfs = []
for year in years:
df = pd.read_csv(f'./concat-data/{location}/{year}.csv')
dfs.append(df)
df = pd.concat(dfs, ignore_index=True) # all data from 2018 to 2022
# df = clean(df) # clean df, e.g., nan handling, rename columns, ...
df
| egg_TaijinPrice | Duckegg_TNN_TaijinPrice | 入中雞雛數 | 產蛋隻數 | 均 日 產蛋箱數 | 淘汰隻數 | 目 前 換羽隻數 | Date | WhiteChickQuantity | RedChickQuantity | ImportChickenQuantity | 玉米粒 | 黃豆粉 | 玉米粉 | 脫殼豆粉 | 高蛋白豆粉 | StationName | AirTemperatureMean | AirTemperatureMax | Precipitation | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24.5 | 34.0 | 1160830.0 | 31170982.0 | 114908.0 | 866824.0 | 633700.0 | 2018-01-01 | 245600 | 84800 | 0.0 | 6.10 | 12.15 | 6.65 | 13.3 | 13.21 | 臺中 | 17.3 | 27.2 | 0.0 |
| 1 | 24.5 | 34.0 | 1160830.0 | 31170982.0 | 114908.0 | 866824.0 | 633700.0 | 2018-01-02 | 245600 | 84800 | 0.0 | 6.10 | 12.15 | 6.65 | 13.3 | 13.21 | 臺中 | 17.3 | 27.2 | 0.0 |
| 2 | 26.5 | 34.0 | 1160830.0 | 31170982.0 | 114908.0 | 866824.0 | 633700.0 | 2018-01-03 | 245600 | 84800 | 0.0 | 6.10 | 12.15 | 6.65 | 13.3 | 13.21 | 臺中 | 17.3 | 27.2 | 0.0 |
| 3 | 26.5 | 34.0 | 1160830.0 | 31170982.0 | 114908.0 | 866824.0 | 633700.0 | 2018-01-04 | 245600 | 84800 | 0.0 | 6.10 | 12.15 | 6.63 | 13.3 | 13.21 | 臺中 | 17.3 | 27.2 | 4.0 |
| 4 | 26.5 | 34.0 | 1160830.0 | 31170982.0 | 114908.0 | 866824.0 | 633700.0 | 2018-01-05 | 245600 | 84800 | 0.0 | 6.10 | 12.15 | 6.63 | 13.3 | 13.20 | 臺中 | 17.3 | 27.2 | 0.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1821 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 2022-12-27 | 387890 | 83670 | NaN | 10.85 | 19.90 | 11.60 | 21.1 | 21.00 | 臺中 | 17.8 | 28.1 | 0.0 |
| 1822 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 2022-12-28 | 387890 | 83670 | NaN | 11.05 | 19.90 | 11.68 | 21.1 | 21.00 | 臺中 | 17.8 | 28.1 | 0.0 |
| 1823 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 2022-12-29 | 353700 | 91500 | NaN | 11.05 | 19.90 | 11.78 | 21.1 | 21.00 | 臺中 | 17.8 | 28.1 | T |
| 1824 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 2022-12-30 | 353700 | 91500 | NaN | 11.20 | 19.90 | 11.78 | 21.1 | 21.00 | 臺中 | 17.8 | 28.1 | 0.0 |
| 1825 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 2022-12-31 | 353700 | 91500 | NaN | 11.20 | 19.90 | 11.78 | 21.1 | 21.00 | 臺中 | 17.8 | 28.1 | 0.0 |
1826 rows × 20 columns
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df = df.drop(columns=['StationName'])
# rename columns
new_columns = {
'egg_TaijinPrice':"EggPrice",
'Duckegg_TNN_TaijinPrice': "DuckEggPrice",
'入中雞雛數':"NumRuChung",
'產蛋隻數':"NumOfChickLayingEggs",
'均 日 產蛋箱數':"BoxesOfEggsPerDay",
'淘汰隻數':"NumDisuse",
'目 前 換羽隻數':"NumMoulting",
'WhiteChickQuantity':"WhiteChickQuantity",
'RedChickQuantity':"RedChickQuantity",
'ImportChickenQuantity':"ImportChickenQuantity",
' 玉米粒':"Corn",
' 黃豆粉 ':"SoyBeanFlour",
'玉米粉 ':"CornFlour",
'脫殼豆粉':"DehulledBeanFlour",
'高蛋白豆粉':"HighProteinBeanFlour",
'AirTemperatureMean':"AirTemperatureMean",
'AirTemperatureMax':"AirTemperatureMax",
'Precipitation':"Precipitation"
}
df = df.rename(columns=new_columns)
# 將所有資料轉成數字,不是數字的資料會變成 NaN
df = df.apply(pd.to_numeric, errors='coerce')
# 空值全部填 0
df = df.fillna(float(0))
df
| EggPrice | DuckEggPrice | NumRuChung | NumOfChickLayingEggs | BoxesOfEggsPerDay | NumDisuse | NumMoulting | WhiteChickQuantity | RedChickQuantity | ImportChickenQuantity | Corn | SoyBeanFlour | CornFlour | DehulledBeanFlour | HighProteinBeanFlour | AirTemperatureMean | AirTemperatureMax | Precipitation | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||||||||||
| 2018-01-01 | 24.5 | 34.0 | 1160830.0 | 31170982.0 | 114908.0 | 866824.0 | 633700.0 | 245600 | 84800 | 0.0 | 6.10 | 12.15 | 6.65 | 13.3 | 13.21 | 17.3 | 27.2 | 0.0 |
| 2018-01-02 | 24.5 | 34.0 | 1160830.0 | 31170982.0 | 114908.0 | 866824.0 | 633700.0 | 245600 | 84800 | 0.0 | 6.10 | 12.15 | 6.65 | 13.3 | 13.21 | 17.3 | 27.2 | 0.0 |
| 2018-01-03 | 26.5 | 34.0 | 1160830.0 | 31170982.0 | 114908.0 | 866824.0 | 633700.0 | 245600 | 84800 | 0.0 | 6.10 | 12.15 | 6.65 | 13.3 | 13.21 | 17.3 | 27.2 | 0.0 |
| 2018-01-04 | 26.5 | 34.0 | 1160830.0 | 31170982.0 | 114908.0 | 866824.0 | 633700.0 | 245600 | 84800 | 0.0 | 6.10 | 12.15 | 6.63 | 13.3 | 13.21 | 17.3 | 27.2 | 4.0 |
| 2018-01-05 | 26.5 | 34.0 | 1160830.0 | 31170982.0 | 114908.0 | 866824.0 | 633700.0 | 245600 | 84800 | 0.0 | 6.10 | 12.15 | 6.63 | 13.3 | 13.20 | 17.3 | 27.2 | 0.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-12-27 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 387890 | 83670 | 0.0 | 10.85 | 19.90 | 11.60 | 21.1 | 21.00 | 17.8 | 28.1 | 0.0 |
| 2022-12-28 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 387890 | 83670 | 0.0 | 11.05 | 19.90 | 11.68 | 21.1 | 21.00 | 17.8 | 28.1 | 0.0 |
| 2022-12-29 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 353700 | 91500 | 0.0 | 11.05 | 19.90 | 11.78 | 21.1 | 21.00 | 17.8 | 28.1 | 0.0 |
| 2022-12-30 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 353700 | 91500 | 0.0 | 11.20 | 19.90 | 11.78 | 21.1 | 21.00 | 17.8 | 28.1 | 0.0 |
| 2022-12-31 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 353700 | 91500 | 0.0 | 11.20 | 19.90 | 11.78 | 21.1 | 21.00 | 17.8 | 28.1 | 0.0 |
1826 rows × 18 columns
df_shift = df.loc['2018-07-01':'2022-12-31']
df_shift
| EggPrice | DuckEggPrice | NumRuChung | NumOfChickLayingEggs | BoxesOfEggsPerDay | NumDisuse | NumMoulting | WhiteChickQuantity | RedChickQuantity | ImportChickenQuantity | Corn | SoyBeanFlour | CornFlour | DehulledBeanFlour | HighProteinBeanFlour | AirTemperatureMean | AirTemperatureMax | Precipitation | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||||||||||
| 2018-07-01 | 27.5 | 32.0 | 1548978.0 | 29964977.0 | 108553.0 | 861380.0 | 687628.0 | 372700 | 42600 | 21060.0 | 6.70 | 13.25 | 7.32 | 14.35 | 14.21 | 28.6 | 35.4 | 13.0 |
| 2018-07-02 | 27.5 | 32.0 | 1548978.0 | 29964977.0 | 108553.0 | 861380.0 | 687628.0 | 372700 | 42600 | 21060.0 | 6.70 | 13.25 | 7.32 | 14.35 | 14.21 | 28.6 | 35.4 | 67.5 |
| 2018-07-03 | 27.5 | 32.0 | 1548978.0 | 29964977.0 | 108553.0 | 861380.0 | 687628.0 | 372700 | 42600 | 21060.0 | 6.70 | 13.20 | 7.30 | 14.30 | 14.14 | 28.6 | 35.4 | 46.0 |
| 2018-07-04 | 27.5 | 34.0 | 1548978.0 | 29964977.0 | 108553.0 | 861380.0 | 687628.0 | 372700 | 42600 | 21060.0 | 6.70 | 13.15 | 7.28 | 14.30 | 14.11 | 28.6 | 35.4 | 31.0 |
| 2018-07-05 | 27.5 | 34.0 | 1548978.0 | 29964977.0 | 108553.0 | 861380.0 | 687628.0 | 372700 | 42600 | 21060.0 | 6.65 | 13.15 | 7.23 | 14.30 | 14.11 | 28.6 | 35.4 | 13.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-12-27 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 387890 | 83670 | 0.0 | 10.85 | 19.90 | 11.60 | 21.10 | 21.00 | 17.8 | 28.1 | 0.0 |
| 2022-12-28 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 387890 | 83670 | 0.0 | 11.05 | 19.90 | 11.68 | 21.10 | 21.00 | 17.8 | 28.1 | 0.0 |
| 2022-12-29 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 353700 | 91500 | 0.0 | 11.05 | 19.90 | 11.78 | 21.10 | 21.00 | 17.8 | 28.1 | 0.0 |
| 2022-12-30 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 353700 | 91500 | 0.0 | 11.20 | 19.90 | 11.78 | 21.10 | 21.00 | 17.8 | 28.1 | 0.0 |
| 2022-12-31 | 40.5 | 43.0 | 1592549.0 | 35037318.0 | 122636.0 | 1203140.0 | 990135.0 | 353700 | 91500 | 0.0 | 11.20 | 19.90 | 11.78 | 21.10 | 21.00 | 17.8 | 28.1 | 0.0 |
1645 rows × 18 columns
from datetime import datetime, timedelta
days_befores = [30, 60, 90, 120, 150, 180]
features_to_extend = [
'NumRuChung',
'NumOfChickLayingEggs',
'BoxesOfEggsPerDay',
'NumDisuse',
'NumMoulting',
'WhiteChickQuantity',
'RedChickQuantity',
'ImportChickenQuantity',
'Corn',
'SoyBeanFlour',
'CornFlour',
'DehulledBeanFlour',
'HighProteinBeanFlour',
]
features_not_extended = [elem for elem in df_shift.columns if elem not in features_to_extend]
extend_columns = {feature: {day: [] for day in days_befores} for feature in features_to_extend}
def date_days_before(input_date, feature):
# Convert input_date to a datetime object
input_datetime = datetime.strptime(input_date, "%Y-%m-%d")
for days_before in days_befores:
result_datetime = input_datetime - timedelta(days=days_before) # Calculate some days before
past_date = result_datetime.strftime("%Y-%m-%d") # Format and return the result as a string
past_data = df.loc[past_date][feature] # find that data
extend_columns[feature][days_before].append(past_data)
for idx in df_shift.index:
date = str(idx)[:10] # idx = 2022-10-15 00:00:00, we only need the date
for feat in extend_columns.keys():
date_days_before(date, feat)
df_extended = df_shift.copy(deep=True)
extended_columns = [] # for plotting
for feat, dict in extend_columns.items():
for days, list in dict.items():
extend_col_name = f'{feat}_{int(days/30)}'
df_extended[extend_col_name] = list
extended_columns.append(extend_col_name)
df_shift = df_extended
# remove the original columns of the extended ones
df_shift = df_shift.drop(features_to_extend, axis=1)
df_shift
| EggPrice | DuckEggPrice | AirTemperatureMean | AirTemperatureMax | Precipitation | NumRuChung_1 | NumRuChung_2 | NumRuChung_3 | NumRuChung_4 | NumRuChung_5 | ... | DehulledBeanFlour_3 | DehulledBeanFlour_4 | DehulledBeanFlour_5 | DehulledBeanFlour_6 | HighProteinBeanFlour_1 | HighProteinBeanFlour_2 | HighProteinBeanFlour_3 | HighProteinBeanFlour_4 | HighProteinBeanFlour_5 | HighProteinBeanFlour_6 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2018-07-01 | 27.5 | 32.0 | 28.6 | 35.4 | 13.0 | 1352635.0 | 1555965.0 | 1305120.0 | 1445970.0 | 1017671.0 | ... | 14.8 | 14.4 | 13.4 | 13.3 | 14.75 | 14.76 | 14.70 | 14.16 | 13.38 | 13.21 |
| 2018-07-02 | 27.5 | 32.0 | 28.6 | 35.4 | 67.5 | 1352635.0 | 1555965.0 | 1305120.0 | 1445970.0 | 1017671.0 | ... | 14.8 | 14.5 | 13.4 | 13.3 | 14.75 | 14.73 | 14.67 | 14.25 | 13.35 | 13.21 |
| 2018-07-03 | 27.5 | 32.0 | 28.6 | 35.4 | 46.0 | 1352635.0 | 1555965.0 | 1305120.0 | 1445970.0 | 1017671.0 | ... | 14.8 | 14.5 | 13.4 | 13.3 | 14.75 | 14.78 | 14.67 | 14.25 | 13.35 | 13.21 |
| 2018-07-04 | 27.5 | 34.0 | 28.6 | 35.4 | 31.0 | 1352635.0 | 1555965.0 | 1305120.0 | 1445970.0 | 1017671.0 | ... | 14.8 | 14.5 | 13.4 | 13.3 | 14.75 | 14.78 | 14.67 | 14.40 | 13.33 | 13.20 |
| 2018-07-05 | 27.5 | 34.0 | 28.6 | 35.4 | 13.0 | 1352635.0 | 1555965.0 | 1305120.0 | 1445970.0 | 1017671.0 | ... | 14.8 | 14.5 | 13.4 | 13.3 | 14.75 | 14.70 | 14.67 | 14.40 | 13.33 | 13.20 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-12-27 | 40.5 | 43.0 | 17.8 | 28.1 | 0.0 | 1547765.0 | 1482554.0 | 1912212.0 | 1654430.0 | 1642580.0 | ... | 20.5 | 19.9 | 19.2 | 23.6 | 20.90 | 20.45 | 20.45 | 19.73 | 18.70 | 18.70 |
| 2022-12-28 | 40.5 | 43.0 | 17.8 | 28.1 | 0.0 | 1547765.0 | 1482554.0 | 1912212.0 | 1654430.0 | 1642580.0 | ... | 20.5 | 19.9 | 19.2 | 18.7 | 20.90 | 20.50 | 20.45 | 19.73 | 18.70 | 18.70 |
| 2022-12-29 | 40.5 | 43.0 | 17.8 | 28.1 | 0.0 | 1547765.0 | 1482554.0 | 1912212.0 | 1654430.0 | 1654430.0 | ... | 20.5 | 19.9 | 11.7 | 18.7 | 20.90 | 20.60 | 20.45 | 19.73 | 19.55 | 18.70 |
| 2022-12-30 | 40.5 | 43.0 | 17.8 | 28.1 | 0.0 | 1547765.0 | 1482554.0 | 1482554.0 | 1912212.0 | 1654430.0 | ... | 20.5 | 19.9 | 11.6 | 18.7 | 21.00 | 20.50 | 20.45 | 19.73 | 19.55 | 18.70 |
| 2022-12-31 | 40.5 | 43.0 | 17.8 | 28.1 | 0.0 | 1592549.0 | 1547765.0 | 1482554.0 | 1912212.0 | 1654430.0 | ... | 20.5 | 19.9 | 11.5 | 18.7 | 21.10 | 20.60 | 20.45 | 19.73 | 19.35 | 18.70 |
1645 rows × 83 columns
"""
Highly relative feature list: (not extended)
DuckEggPrice 0.726912
NumRuChung 0.435494
Corn 0.592012
SoyBeanFlour 0.635711
CornFlour 0.620423
DehulledBeanFlour 0.547041
HighProteinBeanFlour 0.639172
"""
'\nHighly relative feature list: (not extended)\nDuckEggPrice 0.726912\nNumRuChung 0.435494\nCorn 0.592012\nSoyBeanFlour 0.635711\nCornFlour 0.620423\nDehulledBeanFlour 0.547041\nHighProteinBeanFlour 0.639172\n'
from ML.preprocessing import feature_selection
df_shift_fs, selectedFeatures, targetCorr = feature_selection(df_shift, type='corr')
df_shift_fs
Number of selected features: 31 Highly relative feature list: DuckEggPrice 0.731987 Corn_1 0.583691 Corn_2 0.625401 Corn_3 0.631921 Corn_4 0.660566 Corn_5 0.671084 Corn_6 0.670482 SoyBeanFlour_1 0.591983 SoyBeanFlour_2 0.613008 SoyBeanFlour_3 0.590756 SoyBeanFlour_4 0.570220 SoyBeanFlour_5 0.551340 SoyBeanFlour_6 0.529676 CornFlour_1 0.606901 CornFlour_2 0.640753 CornFlour_3 0.657381 CornFlour_4 0.683974 CornFlour_5 0.684478 CornFlour_6 0.682916 DehulledBeanFlour_1 0.524395 DehulledBeanFlour_2 0.555826 DehulledBeanFlour_3 0.582212 DehulledBeanFlour_4 0.608746 DehulledBeanFlour_5 0.639264 DehulledBeanFlour_6 0.669838 HighProteinBeanFlour_1 0.591559 HighProteinBeanFlour_2 0.615984 HighProteinBeanFlour_3 0.591543 HighProteinBeanFlour_4 0.567360 HighProteinBeanFlour_5 0.544886 HighProteinBeanFlour_6 0.521926 Name: EggPrice, dtype: float64
| DuckEggPrice | Corn_1 | Corn_2 | Corn_3 | Corn_4 | Corn_5 | Corn_6 | SoyBeanFlour_1 | SoyBeanFlour_2 | SoyBeanFlour_3 | ... | DehulledBeanFlour_4 | DehulledBeanFlour_5 | DehulledBeanFlour_6 | HighProteinBeanFlour_1 | HighProteinBeanFlour_2 | HighProteinBeanFlour_3 | HighProteinBeanFlour_4 | HighProteinBeanFlour_5 | HighProteinBeanFlour_6 | EggPrice | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2018-07-01 | 32.0 | 7.30 | 7.05 | 8.05 | 6.90 | 6.23 | 6.10 | 13.80 | 13.9 | 13.7 | ... | 14.4 | 13.4 | 13.3 | 14.75 | 14.76 | 14.70 | 14.16 | 13.38 | 13.21 | 27.5 |
| 2018-07-02 | 32.0 | 7.30 | 7.15 | 8.05 | 7.05 | 6.23 | 6.10 | 13.80 | 13.9 | 13.7 | ... | 14.5 | 13.4 | 13.3 | 14.75 | 14.73 | 14.67 | 14.25 | 13.35 | 13.21 | 27.5 |
| 2018-07-03 | 32.0 | 7.30 | 7.30 | 8.05 | 7.05 | 6.23 | 6.10 | 13.80 | 13.9 | 13.7 | ... | 14.5 | 13.4 | 13.3 | 14.75 | 14.78 | 14.67 | 14.25 | 13.35 | 13.21 | 27.5 |
| 2018-07-04 | 34.0 | 7.30 | 7.30 | 8.05 | 7.10 | 6.23 | 6.10 | 13.80 | 13.9 | 13.7 | ... | 14.5 | 13.4 | 13.3 | 14.75 | 14.78 | 14.67 | 14.40 | 13.33 | 13.20 | 27.5 |
| 2018-07-05 | 34.0 | 7.30 | 7.30 | 8.05 | 7.15 | 6.30 | 6.10 | 13.80 | 13.7 | 13.7 | ... | 14.5 | 13.4 | 13.3 | 14.75 | 14.70 | 14.67 | 14.40 | 13.33 | 13.20 | 27.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2022-12-27 | 43.0 | 11.65 | 11.80 | 11.55 | 11.45 | 10.85 | 12.35 | 19.80 | 19.3 | 19.3 | ... | 19.9 | 19.2 | 23.6 | 20.90 | 20.45 | 20.45 | 19.73 | 18.70 | 18.70 | 40.5 |
| 2022-12-28 | 43.0 | 11.65 | 11.80 | 11.55 | 11.50 | 10.85 | 12.30 | 19.80 | 19.5 | 19.3 | ... | 19.9 | 19.2 | 18.7 | 20.90 | 20.50 | 20.45 | 19.73 | 18.70 | 18.70 | 40.5 |
| 2022-12-29 | 43.0 | 11.65 | 11.80 | 11.55 | 11.40 | 10.75 | 12.30 | 19.80 | 19.5 | 19.3 | ... | 19.9 | 11.7 | 18.7 | 20.90 | 20.60 | 20.45 | 19.73 | 19.55 | 18.70 | 40.5 |
| 2022-12-30 | 43.0 | 11.55 | 11.80 | 11.55 | 11.40 | 10.65 | 12.20 | 19.80 | 19.5 | 19.3 | ... | 19.9 | 11.6 | 18.7 | 21.00 | 20.50 | 20.45 | 19.73 | 19.55 | 18.70 | 40.5 |
| 2022-12-31 | 43.0 | 11.50 | 11.80 | 11.60 | 11.35 | 10.55 | 12.20 | 19.85 | 19.5 | 19.3 | ... | 19.9 | 11.5 | 18.7 | 21.10 | 20.60 | 20.45 | 19.73 | 19.35 | 18.70 | 40.5 |
1645 rows × 32 columns
import matplotlib.pyplot as plt
month_before = 1
data = []
for col in extended_columns:
if month_before <= 6:
data.append((month_before, targetCorr[col]))
month_before += 1
if month_before == 7:
# plot bar chart
keys, values = zip(*data)
plt.bar(keys, values)
plt.xlabel('Months Before')
plt.ylabel('Correlation to Y')
plt.title(col[:-2])
plt.savefig(f'./tempfigs/{col[:-2]}.png')
plt.show()
# reset
data = []
month_before = 1